{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Before training\n",
    "\n",
    "This program saves the last 3 generations of models to Google Drive. Since 1 generation of models is >1GB, you should have at least 3GB of free space in Google Drive. If you do not have such free space, it is recommended to create another Google Account.\n",
    "\n",
    "Training requires >10GB VRAM. (T4 should be enough) Inference does not require such a lot of VRAM."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Check GPU\n",
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Mount Google Drive\n",
    "from google.colab import drive\n",
    "drive.mount('/content/drive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Install dependencies\n",
    "#@markdown pip may fail to resolve dependencies and raise ERROR, but it can be ignored.\n",
    "!python -m pip install -U pip wheel\n",
    "%pip install -U ipython \n",
    "\n",
    "#@markdown Branch (for development)\n",
    "BRANCH = \"none\" #@param {\"type\": \"string\"}\n",
    "if BRANCH == \"none\":\n",
    "    %pip install -U so-vits-svc-fork\n",
    "else:\n",
    "    %pip install -U git+https://github.com/34j/so-vits-svc-fork.git@{BRANCH}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Make dataset directory\n",
    "!mkdir -p \"dataset_raw\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!rm -r \"dataset_raw\"\n",
    "#!rm -r \"dataset/44k\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Copy your dataset\n",
    "#@markdown **We assume that your dataset is in your Google Drive's `so-vits-svc-fork/dataset/(speaker_name)` directory.**\n",
    "DATASET_NAME = \"kiritan\" #@param {type: \"string\"}\n",
    "!cp -R /content/drive/MyDrive/so-vits-svc-fork/dataset/{DATASET_NAME}/ -t \"dataset_raw/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Download dataset (Tsukuyomi-chan JVS)\n",
    "#@markdown You can download this dataset if you don't have your own dataset.\n",
    "#@markdown Make sure you agree to the license when using this dataset.\n",
    "#@markdown https://tyc.rei-yumesaki.net/material/corpus/#toc6\n",
    "# !wget https://tyc.rei-yumesaki.net/files/sozai-tyc-corpus1.zip\n",
    "# !unzip sozai-tyc-corpus1.zip\n",
    "# !mv \"/content/つくよみちゃんコーパス Vol.1 声優統計コーパス（JVSコーパス準拠）/おまけ：WAV（+12dB増幅＆高音域削減）/WAV（+12dB増幅＆高音域削減）\" \"dataset_raw/tsukuyomi\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Automatic preprocessing\n",
    "!svc pre-resample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc pre-config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Copy configs file\n",
    "!cp configs/44k/config.json drive/MyDrive/so-vits-svc-fork"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "F0_METHOD = \"dio\" #@param [\"crepe\", \"crepe-tiny\", \"parselmouth\", \"dio\", \"harvest\"]\n",
    "!svc pre-hubert -fm {F0_METHOD}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Train\n",
    "%load_ext tensorboard\n",
    "%tensorboard --logdir drive/MyDrive/so-vits-svc-fork/logs/44k\n",
    "!svc train --model-path drive/MyDrive/so-vits-svc-fork/logs/44k"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training Cluster model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc train-cluster --output-path drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Get the author's voice as a source\n",
    "import random\n",
    "NAME = str(random.randint(1, 49))\n",
    "TYPE = \"fsd50k\" #@param [\"\", \"digit\", \"dog\", \"fsd50k\"]\n",
    "CUSTOM_FILEPATH = \"\" #@param {type: \"string\"}\n",
    "if CUSTOM_FILEPATH != \"\":\n",
    "    NAME = CUSTOM_FILEPATH\n",
    "else:\n",
    "    # it is extremely difficult to find a voice that can download from the internet directly\n",
    "    if TYPE == \"dog\":\n",
    "        !wget -N f\"https://huggingface.co/datasets/437aewuh/dog-dataset/resolve/main/dogs/dogs_{NAME:.0000}.wav\" -O {NAME}.wav\n",
    "    elif TYPE == \"digit\":\n",
    "        # george, jackson, lucas, nicolas, ...\n",
    "        !wget -N f\"https://github.com/Jakobovski/free-spoken-digit-dataset/raw/master/recordings/0_george_{NAME}.wav\" -O {NAME}.wav\n",
    "    elif TYPE == \"fsd50k\":\n",
    "        !wget -N f\"https://huggingface.co/datasets/Fhrozen/FSD50k/blob/main/clips/dev/{10000+int(NAME)}.wav\" -O {NAME}.wav\n",
    "    else:\n",
    "        !wget -N f\"https://zunko.jp/sozai/utau/voice_{\"kiritan\" if NAME < 25 else \"itako\"}{NAME % 5 + 1}.wav\" -O {NAME}.wav\n",
    "from IPython.display import Audio, display\n",
    "display(Audio(f\"{NAME}.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title Use trained model\n",
    "#@markdown **Put your .wav file in `so-vits-svc-fork/audio` directory**\n",
    "from IPython.display import Audio, display\n",
    "!svc infer drive/MyDrive/so-vits-svc-fork/audio/{NAME}.wav -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json\n",
    "display(Audio(f\"drive/MyDrive/so-vits-svc-fork/audio/{NAME}.out.wav\", autoplay=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##@title Use trained model (with cluster)\n",
    "!svc infer {NAME}.wav -s speaker -r 0.1 -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json -k drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt\n",
    "display(Audio(f\"{NAME}.out.wav\", autoplay=True))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pretrained models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/tree/main\n",
    "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/G_riri_220.pth\"\n",
    "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/config.json\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc infer {NAME}.wav -c config.json -m G_riri_220.pth\n",
    "display(Audio(f\"{NAME}.out.wav\", autoplay=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#@title https://huggingface.co/therealvul/so-vits-svc-4.0/tree/main\n",
    "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/G_166400.pth\"\n",
    "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/config.json\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc infer {NAME}.wav --speaker \"Pinkie {neutral}\" -c config.json -m G_166400.pth\n",
    "display(Audio(f\"{NAME}.out.wav\", autoplay=True))"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
